# -*- coding: utf-8 -*-
import json
import math

import scrapy

from apps.models import OTA
from apps.models.comment_scrapy import CommentMeituan
from apps.models.data_scrapy import DataMeituan

'''
触发了美团反爬机制 ToDo待解决
在重试机制上还可以优化
'''


class MeituanSpider(scrapy.Spider):
    name = 'meituan'
    allowed_domains = ['www.meituan.com']
    start_urls = ['https://www.meituan.com/ptapi/poi/getcomment?id=1515791&offset=0&pageSize=10&mode=0&sortType=1']

    base_url = r'https://www.meituan.com/ptapi/poi/getcomment?id={ota_spot_id}&offset={page_num}&pageSize={page_size}&mode=0&sortType=1'
    ota_spot_ids = OTA.OtaSpotIdMap.get_ota_spot_list(OTA.OtaCode.MEITUAN)  # ota 景区id列表
    page_size = 50  # 美团限制了最多50条

    def parse(self, response):
        # 爬取景区列表数据
        for ota_spot_id in self.ota_spot_ids:
            # 更新景区的评论数量
            url = self.base_url.format(ota_spot_id=ota_spot_id, page_num=0, page_size=10)

            yield scrapy.Request(url=url, callback=self.parse_data, dont_filter=True,
                                 meta={'ota_spot_id': ota_spot_id})

    def parse_data(self, response):
        print('--------从 ', response.url, ' 中爬取数据---------------')
        try:
            result = json.loads(response.body)
            print(result['total'])

            ota_spot_id = response.meta['ota_spot_id']
            total = result['total']
            tags = result['tags']

            DataMeituan.objects(ota_spot_id=ota_spot_id).update_one(
                set__tags=tags,
                set__total=total,
                upsert=True
            )
            total_page = math.ceil(total / self.page_size)
            for page_num in range(0, total_page + 1):
                print('---------开始爬取第 ', page_num, ' 页------------')
                url = self.base_url.format(page_num=page_num, page_size=self.page_size, ota_spot_id=ota_spot_id)
                yield scrapy.Request(url=url, method='GET',
                                     callback=self.parse_comment, dont_filter=True,
                                     meta={'ota_spot_id': response.meta['ota_spot_id'], 'page_num': page_num})
        except json.decoder.JSONDecodeError:
            # -----------------出错则重试 -------------
            r = response.request.copy()
            r.dont_filter = True
            yield r

    def parse_comment(self, response):
        try:
            result = json.loads(response.body)
            print('开始插入数据------------------------', response.meta['page_num'])
            for key, value in enumerate(result.get('comments')):
                ota_spot_id = response.meta['ota_spot_id']
                review_id = value['reviewId']

                user_name = value['userName']
                user_url = value['userUrl']
                comment = value['comment']
                pic_urls = value['picUrls']
                comment_time = value['commentTime']
                reply_cnt = value['replyCnt']
                zan_cnt = value['zanCnt']
                read_cnt = value['readCnt']
                user_id = value['userId']
                star = value['star']
                menu = value['menu']

                CommentMeituan.objects(ota_spot_id=ota_spot_id, review_id=review_id).update_one(
                    set__user_name=user_name,
                    set__user_url=user_url,
                    set__comment=comment,
                    set__pic_urls=pic_urls,
                    set__comment_time=comment_time,
                    set__reply_cnt=reply_cnt,
                    set__zan_cnt=zan_cnt,
                    set__read_cnt=read_cnt,
                    set__user_id=user_id,
                    set__star=star,
                    set__menu=menu,
                    upsert=True
                )
                yield None
        except json.decoder.JSONDecodeError:
            # -----------------出错则重试 -------------
            r = response.request.copy()
            r.dont_filter = True
            yield r
